// createData1km.do
// Inputs: corelogic`state'_tax, 1km_us_buffer_coor, `state'_coor_`'ft, corelogic`state'_deed, distancefull, ZIP_COUNTY_CW_2014, Yale_Buyer_2014, Yale_Data_2014, electionEC, countyincome, countypopulation, buyer_countyincome, buyer_countypopulation, buyer_countyeducation, county_2005_2016, ECandFSfull, location_summary_full, location_detail_full, probability_depth_2050, historic_event_summary_full, adaptation_summary_full
// Outputs: 1km`state'_tax, 1kmEC_tax, cleaned`state'_deed, cleanedEC_deed, 1kmEC_td, EC, EC_mtg, EC_trans, intermediate
// Date last updated: 1/27/2025

// This .do file combines many of the above intermediate datasets, as well as other raw First Street property-level data on bare earth elevation, historic flood events, climate adaptations, and sea depth probability. It also reshapes the data to the transaction level, creates bins for elevation and distance to the coast, and consolidates some mortgage variables. 
// The raw First Street data was obtained using an API which is no longer in use; First Street data can now be accessed using RADAR.
// CoreLogic's Climate Risk database now also has bare earth elevation for each property; this was not available at the time our data was created. If replicating, you can download gr_el_used along with the property ID and merge that to the rest of the CoreLogic data.

********************************************************************************	
*  Clean CoreLogic Tax data and merge with SLR shape files
********************************************************************************
// Start timer
timer on 1

// Loop through each state importing and cleaning tax data, merging with SLR shape files
foreach state in CT DE FL GA MA MD ME NC NH NJ NY PA RI SC VA{
	use "$pathi\corelogic`state'_tax", clear
	
	* Imputes record date for missing sale date
		destring mtg_dt, replace
		destring sale_dt, replace
		destring record_dt, replace
		replace mtg_dt = . if mtg_dt == 0
		replace sale_dt = . if sale_dt == 0
		replace record_dt = . if record_dt == 0
		gen sale_dt2 = sale_dt
		replace sale_dt2 = record_dt if sale_dt2 == .
		format sale_dt2 %12.0f

	* Filter Sales Price
		keep if sale_price > 50000 & sale_price < 10000000
	
	* Drop condos
		drop if prop_type == 11
	
	* Restrict to properties within 1 km of coast - merge to coordinates within 1km and drop if no merge
		rename latitude _Y
		rename longitude _X
		geoinpoly _Y _X using "$pathi\1km_us_buffer_coor.dta", noprojection
		drop if missing(_ID)
		drop _ID
		
	* Merge with Sea Level Rise shape files
	* If lat/long in xft shape file, property inundated with x feet SLR, so dummy=1
		
		*0ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_0ft.dta"
		gen underwater_0ft=1
		replace underwater_0ft=0 if missing(_ID)
		drop _ID		
		*1ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_1ft.dta"
		gen underwater_1ft=1
		replace underwater_1ft=0 if missing(_ID)
		drop _ID	
		*2ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_2ft.dta"
		gen underwater_2ft=1
		replace underwater_2ft=0 if missing(_ID)
		drop _ID
		*3ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_3ft.dta"
		gen underwater_3ft=1
		replace underwater_3ft=0 if missing(_ID)
		drop _ID
		*4ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_4ft.dta"
		gen underwater_4ft=1
		replace underwater_4ft=0 if missing(_ID)
		drop _ID
		*5ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_5ft.dta"
		gen underwater_5ft=1
		replace underwater_5ft=0 if missing(_ID)
		drop _ID
		*6ft shapefile
		geoinpoly _Y _X using "$pathi\`state'_coor_6ft.dta"
		gen underwater_6ft=1
		replace underwater_6ft=0 if missing(_ID)
		drop _ID
	save "$pathi\1km`state'_tax.dta", replace
}
timer off 1

// Merge TAX east coast states together into a single EC file
	use "$pathi\1kmCT_tax.dta", clear
	foreach state in DE FL GA MA MD ME NC NH NJ NY PA RI SC VA{
		append using "$pathi\1km`state'_tax", force
	}
	
// Sort by property and date to keep only most recent tax year info, drop perfect duplicates, save
	sort prop_id_dw sale_dt2 tax_year
	duplicates drop
	drop if prop_id_dw == prop_id_dw[_n+1] & sale_dt2 == sale_dt2[_n+1]& tax_year < tax_year[_n+1]
	
// Temp file for merging
	tempfile 1kmEC_tax
	save `1kmEC_tax'

********************************************************************************	
*  Clean CoreLogic Deed data and append together, merging to tax
********************************************************************************
// Merge DEED east coast states together into a single EC file
// Loop through each state to impute record date, filter sales price, and save clean version
foreach state in CT DE FL GA MA MD ME NC NH NJ NY PA RI SC VA{
	use "$pathi\corelogic`state'_deed", clear
	
	* Imputes record date for missing sale date
	destring mtg_dt, replace
	destring sale_dt, replace
	destring record_dt, replace
	replace mtg_dt = . if mtg_dt == 0
	replace sale_dt = . if sale_dt == 0
	replace record_dt = . if record_dt == 0
	gen sale_dt2 = sale_dt
	replace sale_dt2 = record_dt if sale_dt2 == .
	format sale_dt2 %12.0f
	destring sale_price, replace
	
	* Filter Sales Price
	keep if sale_price > 50000 & sale_price < 10000000
	
	* Save
	save "$pathi\cleaned`state'_deed.dta", replace
}
timer off 1

// Append together each state
	use "$pathi\cleanedCT_deed.dta", clear
	foreach state in DE FL GA MA MD ME NC NH NJ NY PA RI SC VA{
		append using "$pathi\cleaned`state'_deed", force
	}
	
// Sort and save	
	sort prop_id_dw sale_dt2
	save "$pathi\cleanedEC_deed", replace

	******* DEED AND TAX **********
// Clean by destringing
	destring mtg_amt, replace
	destring fips_code, replace
	
// Merge with tax data, drop those only in tax	
	merge m:1 prop_id_dw sale_dt2 using `1kmEC_tax'
	drop if _merge == 1
	
// Order and save
	order prop_id_dw state sale_price sale_dt2 mtg_amt mtg_dt mtg_int_rate mtg_code lender_ln lender_co_code
	rename _merge _mergedeed
	save "$pathi\1kmEC_td", replace
	
// Rename lat and long for merging distance
	rename _X lon
	rename _Y lat
	sort lon lat
	gen y = round(lat, .00001)
	gen x = round(lon, .00001)
	
// Merge Distance Dataset
	* We will not have 100% match because we restricted distance calculation to sfh, >2000	
	merge m:1 x y using "$pathi\distancefull", force
	drop if _merge == 2
	rename _merge _mergedistance
	
// Keep only properties within 1000m (1km) of the coast	
	drop if near_dist > 1000


********************************************************************************	
* Data cleaning
********************************************************************************

// Rename/destring bedrooms
	rename bedrooms_num bed
	destring bed, replace
	replace bed = . if bed == 0

// Rename/destring bathrooms 
	rename totalbaths_num bath
	destring bath, replace
	replace bath = . if bath == 0 

// Destring Financial Variables and make leverage
	destring mtg_amt, replace
	destring sale_price, replace
	gen leverage = mtg_amt/sale_price

// Destring various sqft variables and replace as missing if 0 (can't have 0 sqft property)
	rename adj_gross_sqft adj_sqft
	destring adj_sqft, replace
	replace adj_sqft = . if adj_sqft == 0
	destring bldg_sqft, replace
	replace bldg_sqft = . if bldg_sqft == 0
	destring gross_sqft, replace
	replace gross_sqft = . if gross_sqft == 0

// Interpolate square footage together to have one consistent measure
	*Take B0 + B1*sqft
	gen bldg_sqft_hat = .
	reg bldg_sqft gross_sqft
	replace bldg_sqft_hat = _b[_cons] + _b[gross_sqft]*gross_sqft if gross_sqft != .
	reg bldg_sqft adj_sqft
	replace bldg_sqft_hat = _b[_cons] + _b[adj_sqft]*adj_sqft if adj_sqft != .
	replace bldg_sqft_hat = bldg_sqft if bldg_sqft != . 
	replace bldg_sqft_hat = bldg_sqft_hat/1000

// Calculate age of house as sale year - year built
	destring year_built, replace
	replace year_built = . if year_built == 0
	gen sale_year = floor(sale_dt2/10000)
	gen age = sale_year - year_built 
	replace age = . if age < 0
	
	* Get month and year of sale date
	gen sale_yearmonth = floor(sale_dt2/100)
	gen sale_month = mod(sale_yearmonth,100)

// Single Family Home and Condo Indicator
	gen condo = 0
	destring prop_type, replace
	replace condo = 1 if prop_type == 11
	gen sfh = 0 
	replace sfh = 1 if prop_type == 10	

// Destring mtg and replace as missing those over 50 years
	destring mtg_term, replace
	replace mtg_term = . if mtg_term > 50 
	replace mtg_amt = . if mtg_amt > sale_price

// Create owner occupied dummy
	gen OO = 0
	replace OO = 1 if absentee_owner_type == "O" //O = owner occupied
	replace OO = 1 if absentee_owner_type == "M" //M = property address taken from mail, assumed owner occupied
	replace OO = 1 if absentee_owner_type == "S" //S = property address taken from sales transaction - determined owner occupied

// Destring deed dataset variables
	destring mtg_int_rate, replace
	replace mtg_int_rate = . if mtg_int_rate == 0
	destring lender_co_code, replace
	destring mail_zip, replace
	destring cl_dt, replace
	destring mtg_seq_num, replace
	destring cl_seq, replace
	
// Turn resale and new construction variables into dummies	
	gen resale = 1 if resale_newconst == "M"
	replace resale = 0 if resale == .
	gen newconst = 1 if resale_newconst == "N"
	replace newconst = 0 if newconst == .
	
********************************************************************************	
* Merge in control data
********************************************************************************
// Get buyer zip code and merge in crosswalk to find buyer county
	gen buyer_zip = owner_zip
	replace buyer_zip = mail_zip if buyer_zip == ""
	gen buyer_zip_dw = substr(buyer_zip, 1, 5)
	sort buyer_zip_dw 
	merge m:1 buyer_zip_dw using "$pathi\ZIP_COUNTY_CW_2014.dta"
	rename _merge _mergezipcross
	drop if prop_id_dw == ""
	sort buyer_fips
	destring(buyer_fips), replace

// Merge in climate belief data for buyer county
	merge m:1 buyer_fips using "$pathi\Yale_Buyer_2014.dta" 
	keep if _merge == 3
	rename _merge _mergeyalebuyer
	rename happening happening_buyer
	rename worried worried_buyer
	rename timing timing_buyer

// Format fips code, merge in property county's climate beliefs
	* Change Any FIPS Code Here (For FL Just Need to Change Miami Dade)
	destring fips_code, replace
	replace fips_code = 12086 if fips_code == 12025
	sort fips_code
	merge m:1 fips_code using "$pathi\Yale_Data_2014"

// Statistics
	sum(_merge)
	drop if _merge == 2
	rename _merge _mergeyale

// Make election year variable and apply each sale year to nearest election year
	gen electionyear = .
	replace electionyear = 2000 if sale_year >= 2001 & sale_year <= 2004
	replace electionyear = 2004 if sale_year >= 2005 & sale_year <= 2008
	replace electionyear = 2008 if sale_year >= 2009 & sale_year <= 2012
	replace electionyear = 2012 if sale_year >= 2013 & sale_year <= 2016
	sort fips_code electionyear
	
// Merge in election data for share of democrat/republican votes in the county
	merge m:1 fips_code electionyear using "$pathi\electionEC"
	label variable electionyear "year of last election when property was sold (if 2004, use 2000; if 2008, use 2004, etc.)"
	label variable gopshare "share of republican vote in latest presidential election"
	rename _merge _mergeelection

// County Income data - change name of year for merging
	gen year = sale_year
	sort fips_code year
	merge m:1 fips_code year using "$pathi\countyincome"
	drop if prop_id_dw==""
	destring income, replace
	label variable income "by county, per capita income in dollars"
	rename _merge _mergeincome

// County Population data
	sort fips_code year
	merge m:1 fips_code year using "$pathi\countypopulation"
	drop if prop_id_dw==""
	destring population, replace
	label variable population "by county, population"
	rename _merge _mergepopulation 
	
********************************************************************************	
* Label variables
********************************************************************************	
// Label variables 
	label variable prop_id_dw "property ID"
	label variable state "state of property"
	label variable year_built "year property was built"
	label variable adj_sqft "adjusted gross square feet"
	label variable bed "number of bedrooms"
	label variable bath "number of bathrooms"
	label variable garage_type "type of garage"
	label variable zip_dw "property zip code, 5 digits"
	label variable zip "property zip code, 9 digits"
	label variable zoning_type "zoning type of property"
	label variable sale_dt "date of transaction for property"
	label variable sale_price "price of property in transaction"
	label variable mtg_amt "originiation amount of the mortgage"
	label variable mtg_term "length of time for mortgage to mature"
	label variable mtg_dt "origination date of the mortgage (YYYYMM)"
	label variable mtg_loan_type "type of loan (conventional, FHA, VA, etc.)"
	label variable bldg_sqft "total sqft of building"
	label variable residential_flag "flag for residential properties"
	label variable fema_panel_num "FEMA's community panel number"
	label variable land_use_type "corelogic-generate land use code"
	label variable y "latitude"
	label variable x "longitude"
	label variable absentee_owner_type "flag indicating whether owner lives at property"
	label variable owner_zip "zip code of owner"
	label variable calc_total_value "the total (land and improvement) value closest to current market value"
	label variable assd_total_value "the total assessed value of the parcel as provided by county/local taxing property"
	label variable tax_amt "the total tax amount provided by the county or local taxing authority"
	label variable tax_year "the tax year for which the taxes were billed"
	label variable trans_type "corelogic-generated code identifying the type of transaction"
	label variable gross_sqft "the size of the entire building in square feet (similar to bldg_sqft)"
	label variable fips_code "5-digit federal information processing standards code for state/county"
	label variable underwater_6ft "indicator for whether property will be underwater from 6 ft SLR"
	label variable underwater_3ft "indicator for whether property will be underwater from 3 ft SLR"
	label variable elevation "elevation of property (in feet)"
	label variable apn "apn sequence for merging"
	label variable stories_num "number of stories"
	label variable sale_dt2 "official sale date imputing mortgage date"
	label variable bldg_sqft_hat "imputed sqft measure"
	label variable sfh "single family home indicator"
	label variable county "name of county"
	label variable happening "by county, percent who believe climate change is happening 2014"
	label variable worried "by county, percent worried about climate change 2014"
	label variable leverage "mortgage amount/sales price"
	label variable sale_year "year that property was sold"
	label variable age "age of property when sold"
	label variable condo "indicator for whether property is a condo"
	label variable OO "indicator for whether property is owner occupied"
	label variable near_dist "distance of property to coast (in meters)"
	label variable mtg_int_rate "interest rate for mortgage"
	label variable mtg_code "code given to mortgage (definitions on corelogic)"
	label variable lender_ln "last name of lender"
	label variable lender_co_code "code of lender"
	label variable mtg_seq_num "mortgage sequence number (lets us see number of mortgages)"
	label variable cl_dt "closing date"
	label variable cl_seq "closing sequence number"
	label variable doc_num "documentation number"
	label variable mail_city "city of buyer"
	label variable mail_state "state of buyer"
	label variable mail_zip "zip code of buyer"
	label variable lender_zip "zip code of lender"
	label variable mtg_int_type "type of interest rate for mortgage (codes on corelogic)"
	label variable resale_newconst "indicator for whether property is resale or new construction"
	label variable equity_flag "flag indicating that transaction is equity loan (codes on corelogic)"
	label variable refi_flag "flag indicating that transaction is refinance loan (codes on corelogic)"
	label variable mtg_deed_type "type of mortgage deed"
	label variable mtg_assumpt_amt "amount of mortgage assumed from prior owner"
	label variable prim_code "primary category of transaction"
	label variable deed_code "category code providing additional deed information"
	label variable seller_carry_flag "flag indicating that transaction involved a seller carry-back mortgage"
	label variable private_lender_flag "flag indicating that the lender is a private party, not a financial institution"
	label variable foreclosure_flag "flag indicating transaction is a foreclosure and describing the type of entities involved in the transfer of the property"
	label variable cash_sale_flag "indicator distinguishing between cash sales and those where mortgages were used"
	label variable record_dt "date of record - typically matches sale/mortgage date"
	label variable buyer_corp_flag "flag indicating that the name of the property owner has been recognized as a corporation or business"
	label variable latitude "latitude value coming from deed dataset"
	label variable longitude "longitude value coming from deed dataset"
	label variable pool_flag "indicator of whether property has pool"
	label variable prop_type "type of property"
	label variable lat "latitude value used for merging"
	label variable lon "longitude value used for merging"
	label variable effective_year_built "year of last major renovation (effective year built)"
	label variable owner_corp_flag "flag indicating that the name of the property owner has been recognized as a corporation or business"
	label variable multi_apn_flag "Flag indicating that data in this field cover multiple or split parcel"
	label variable sale_code "Type of financial consideration made in sale (e.g., entire property sale, partial property sale, etc.)."
	label variable sale_yearmonth "year and month of sale"
	label variable sale_month "month of sale"
	label variable resale "indicator of whether property is resale"
	label variable newconst "indicator of whether property is new contruction"
	label variable buyer_zip "buyer zip code"
	label variable buyer_zip_dw "buyer zip code (5 digits)"
	label variable happening_buyer "by buyer's county, percent who believe climate change is happening in 2014"
	label variable worried_buyer "by buyer's county, percent who are worried about climate change in 2014"
	label variable mtg_deed "Type of mortgage deed (codes in Corelogic)"

********************************************************************************	
* Merge in First Street data (bare earth elevation and other vars)
********************************************************************************
* Merge in First Street IDs
	sort prop_id_dw
	merge m:1 prop_id_dw using "$pathi\ECandFSfull.dta", force
	rename _merge _mergefsid
	drop if prop_id_dw == ""
	label variable fsid "first street ID"

// Merge in location summary
	sort fsid
	merge m:1 fsid using "$pathr\location_summary_full"
	rename _merge _mergelocsum
	rename elevation elevation_R
	
// Merge in location detail
	merge m:1 fsid using "$pathr\location_detail_full", force
	rename _merge _mergelocdet
	
	* Clean elevation
	destring elevation, replace force 
	rename elevation elevationFS
	replace elevationFS = elevationFS/100
	
	* Drop incorrecx=t FSID matches
	drop if county_fips != fips_code & _mergelocdet == 3
	
// Merge in probability of sea depth (2050, 100 and 500 years)
	merge m:1 fsid using "$pathr\probability_depth_2050"
	rename _merge _mergeprobdepth
	drop if _mergeprobdepth == 2
	rename year floodriskyear
	destring mid100, replace force
	destring mid500, replace force
	
// Merge in summary of historic weather events, create dummy
	merge m:1 fsid using "$pathr\historic_event_summary_full"
	rename _merge _mergehistevent
	drop if _mergehistevent == 2
	replace noevent = 0 if noevent == . & _mergehistevent == 3
	
// Merge summary of climate adaptations at each property, create adaptation dummy	
	merge m:1 fsid using "$pathr\adaptation_summary_full"
	rename _merge _mergeadaptation
	drop if _mergeadaptation == 2
	replace noadaptation = 0 if noadaptation == . & _mergeadaptation == 3
	
// Label vars and save processed data
	label variable elevationFS "lowest point of elevation on property (in meters)"
	label variable floodfactor "1-10 value for flood risk calculated by FirstStreet"
	save "$pathi\EC", replace

********************************************************************************	
* Drop duplicates and reshape data to transaction level
********************************************************************************	
// In order to do some reshaping, we will need to figure out duplicates
// Drop the properties that are "duplicated" based on buyer county, since we already took care of averaging out the buyer happening/worried
	sort prop_id_dw sale_dt2 mtg_seq
	gen diff_county = 0
	replace diff_county = 1  if prop_id_dw == prop_id_dw[_n-1] & sale_dt2 == sale_dt2[_n-1] & buyer_fips != buyer_fips[_n-1]
	replace diff_county = 1 if prop_id_dw == prop_id_dw[_n+1] & sale_dt2 == sale_dt2[_n+1] & buyer_fips != buyer_fips[_n+1]
	duplicates drop prop_id_dw sale_dt2 cl_seq mtg_seq if diff_county == 1, force

// Drop duplicates of prop ID, sale date, and mortgage sequence number (since this is what we merge on)
	duplicates drop prop_id_dw sale_dt2 mtg_seq mtg_amt mtg_code lender_ln lender_co_code, force

// Now we should have individual property-mortgage records, but need to give a unique mortgage number to each transaction-mortgage pair
	bysort prop_id_dw sale_dt2: gen mtg_seq = _n
	tab mtg_seq
	* drop if more than 5 mortgages on single property at a time
	drop if mtg_seq > 5

// Currently our observations are in property-mortgage format; we will save them in this format for now
	save "$pathi\EC_mtg", replace

// Drop unnecessary variables
	drop zoning_type land_use_type mail_city mail_state mail_zip sale_dt doc_num resale_newconst record_dt adj_sqft bldg_sqft pool_flag garage_type owner_zip calc_total_value assd_total_value gross_sqft fid objectid in_fid num mtg_seq_num dup buyer_zip county diff_county

// For 62 properties, near_dist is ~1 meter off; to avoid having to include it in reshaping, we will replace the "non-matching" properties with the first
	replace near_dist = near_dist[_n-1] if prop_id_dw == prop_id_dw[_n-1] & sale_dt2 == sale_dt2[_n-1] & near_dist != near_dist[_n-1]

	* Do the same for buyer_zip 
	replace buyer_zip_dw = buyer_zip_dw[_n-1] if prop_id_dw == prop_id_dw[_n-1] & sale_dt2 == sale_dt2[_n-1] & buyer_zip_dw != buyer_zip_dw[_n-1]

	* Same thing with buyer_fips 
	replace buyer_fips = buyer_fips[_n-1] if prop_id_dw == prop_id_dw[_n-1] & sale_dt2 == sale_dt2[_n-1] & buyer_fips != buyer_fips[_n-1]

	* Same for _mergezipcross and _mergeyalebuyer 
	replace _mergezipcross = _mergezipcross[_n-1] if prop_id_dw == prop_id_dw[_n-1] & sale_dt2 == sale_dt2[_n-1] & _mergezipcross != _mergezipcross[_n-1]

// Reshape to transaction level
	reshape wide mtg_amt mtg_dt mtg_int_rate mtg_code lender_ln lender_co_code cl_dt cl_seq lender_zip mtg_int_type mtg_loan_type mtg_deed_type mtg_assumpt_amt prim_code deed_code seller_carry_flag private_lender_flag foreclosure_flag cash_sale_flag buyer_corp_flag  leverage sale_price equity_flag refi_flag resale newconst mtg_term, i(prop_id_dw sale_dt2) j(mtg_seq)
	
// Generate the continuous exposure variable from each level of SLR dummy
	gen under_cont = 0
	replace under_cont = 1 if underwater_6ft == 1
	replace under_cont = 2 if underwater_5ft == 1
	replace under_cont = 3 if underwater_4ft == 1
	replace under_cont = 4 if underwater_3ft == 1
	replace under_cont = 5 if underwater_2ft == 1
	replace under_cont = 6 if underwater_1ft == 1
	replace under_cont = 7 if underwater_0ft == 1
	label variable under_cont "from 0-7, 0 is not inundated with 6 ft of SLR and 7 is inundated with 0 ft of SLR"

// Bring most of time-invariant variables to the front
	order prop_id_dw sale_dt2 sale_yearmonth sale_year sale_month bed bath age bldg_sqft_hat stories_num year_built effective_year_built owner_corp_flag near_dist elevationFS gopshare income population sfh condo OO happening_buyer worried_buyer happening_2014 worried_2014 underwater_0ft underwater_1ft underwater_2ft underwater_3ft underwater_4ft underwater_5ft underwater_6ft under_cont sale_code
	
// Check number of mortgages
	forval i=1/5{
		gen mtg`i' = 0
		replace mtg`i'=1 if mtg_amt`i'>0&mtg_amt`i'!= .
	}
	gen num_mtg = 0
	replace num_mtg = mtg1 + mtg2 + mtg3 + mtg4 + mtg5
	drop mtg1 mtg2 mtg3 mtg4 mtg5

// Save transaction records - one record for each transaction
	save "$pathi\EC_trans", replace

********************************************************************************	
* Process data for regressions
********************************************************************************
// Transformations of sale price and mortgage
	gen ln_sale_price = log(sale_price1)
	gen asinh_mtg = asinh(mtg_amt1)
	gen ln_mtg = log(mtg_amt1)
	gen mtg_dum = mtg_amt1 > 0
	
// Dummy for non-local buyer if zip and buyer zip don't match
	gen nlb = 1 if zip_dw != buyer_zip_dw
	replace nlb = 0 if nlb == . & zip_dw != "" & buyer_zip_dw != ""

// Shift all elevations up so none are negative and create 2m elevation bins
	egen min_elevFS = min(elevationFS)
	gen elevation_adj = elevationFS - min_elevFS
	gen elev_bin2m = ceil(elevation_adj/2)

// Create new distance from coast bins
	egen min_neardist = min(near_dist)
	gen neardist_adjm = near_dist - min_neardist
	gen bin_distm = ceil(neardist_adjm/1)

// Other bins
	gen bin_dist = .
	forval i=1/9{
		replace bin_dist = `i'00 if near_dist > `i'00 & near_dist <= `i'00 + 100
	}
	forval i=0/9{
		replace bin_dist = `i'0 if near_dist > `i'0 & near_dist <= `i'0 + 10
	}

// Distance bins in miles (.01 miles, .02 miles, .08 miles, .16 miles)
	gen near_dist_mi = near_dist/1609.34
	gen bin_dist_mi = . 
	replace bin_dist_mi = 1 if near_dist_mi <= .01
	replace bin_dist_mi = 2 if near_dist_mi <= .02 & near_dist_mi > .01
	replace bin_dist_mi = 3 if near_dist_mi <= .04 & near_dist_mi > .02
	replace bin_dist_mi = 4 if near_dist_mi <= .08 & near_dist_mi > .04
	replace bin_dist_mi = 5 if near_dist_mi <= .16 & near_dist_mi > .08
	replace bin_dist_mi = 6 if near_dist_mi > .16

// Sqft percentiles
	xtile sqft_ptile = bldg_sqft_hat,nq(100)

// Age percentiles
	xtile age_ptile = age,nq(100)

// create sale quarter
	gen sale_quarter = 1 if sale_month == 1 | sale_month == 2 | sale_month == 3
	replace sale_quarter = 2 if sale_month == 4 | sale_month == 5 | sale_month == 6
	replace sale_quarter = 3 if sale_month == 7 | sale_month == 8 | sale_month == 9
	replace sale_quarter = 4 if sale_month == 10 | sale_month == 11 | sale_month == 12
	
	* Sale year + quarter
	tostring(sale_quarter), replace
	tostring(sale_year), gen (str_year)
	gen sale_yearqtr = str_year + sale_quarter
	destring(sale_yearqtr), replace force

// Keep just sample of years (after 2000)
	keep if sale_year > 2000

// Rename happening and destring zip code
	ren happening_2014 happen
	destring(zip_dw), replace

// Generate above-median-belief dummy variables
	sum happen, d
	gen above_happen = 1 if happen >= r(p50)
	replace above_happen = 0 if above_happen == . 
	replace above_happen = . if happen == .
	
	sum happening_buyer,d 
	gen above_happen_buyer = 1 if happening_buyer >= r(p50)
	replace above_happen_buyer = 0 if above_happen_buyer == . 
	replace above_happen_buyer = . if happening_buyer == .
	
	sum worried, d
	gen above_worried = 1 if worried >= r(p50)
	replace above_worried = 0 if above_worried == . 
	replace above_worried = . if worried == .
	
	sum worried_buyer, d 
	gen above_worried_buyer = 1 if worried_buyer >= r(p50)
	replace above_worried_buyer = 0 if above_worried_buyer == . 
	replace above_worried_buyer = . if worried_buyer == .
	
	sum timing_buyer, d
	gen above_timing_buyer = 1 if timing_buyer >= r(p50)
	replace above_timing_buyer = 0 if above_timing_buyer == . 
	replace above_timing_buyer = . if timing_buyer == .

// Belief quartile for happening question
	xtile quart = happening_buyer, nq(4)
	
********************************************************************************	
* Format mortgage information
********************************************************************************
// Consolidate mortgage info from various obs (pre-reshape) into one variable
	gen mtg1 = mtg_amt1
	replace mtg1 = 0 if mtg_amt1 == .
	gen mtg2 = mtg_amt2
	replace mtg2 = 0 if mtg_amt2 == .
	gen mtg3 = mtg_amt3
	replace mtg3 = 0 if mtg_amt3 == .
	gen mtg4 = mtg_amt4
	replace mtg4 = 0 if mtg_amt4 == .
	gen mtg5 = mtg_amt5
	replace mtg5 = 0 if mtg_amt5 == .
	gen mtg = mtg1 + mtg2 + mtg3 + mtg4 + mtg5 

	label variable mtg "total mortgage amount across all possible mortgages"

// Consolidate variable for mortgage term
	gen mtg_term = mtg_term1 
	replace mtg_term = mtg_term2 if mtg_term == .
	replace mtg_term = mtg_term3 if mtg_term == .
	replace mtg_term = mtg_term4 if mtg_term == .
	replace mtg_term = mtg_term5 if mtg_term == .
	replace mtg_term = . if mtg_term == 0
	replace mtg_term = . if mtg_term != . & mtg == 0

	label variable mtg_term "mortgage length, subbed in if missing from all possible mortgages"
	
// Consolidate lender code variables
	gen lender_code = lender_co_code1
	replace lender_code = lender_co_code2 if lender_code == .
	replace lender_code = lender_co_code3 if lender_code == .
	replace lender_code = lender_co_code4 if lender_code == . 
	replace lender_code = lender_co_code5 if lender_code == .

// Mortgage maturity variables - 30 years or over 30
	gen mtg_30 = 1 if mtg_term == 30
	replace mtg_30 = 0 if mtg_30 == . & mtg_term != . 
	gen mtg_over30 = 1 if mtg_term >= 30 & mtg_term != .
	replace mtg_over30 = 0 if mtg_over30 == . & mtg_term != . 

// Leverage variable is mortgage divided by sale price, missing if weird prices
	gen lvg = mtg/sale_price1 
	replace lvg = . if sale_price1 != sale_price2 & sale_price2 != .
	gen double lev  = round(lvg, .00001)
	label variable lev  "total leverage across all possible mortgages"

// Dummy if property is at/over 80% leveraged
	gen leverage_8 = 1 if lev >= .8
	replace leverage_8 = 0 if leverage_8 == .
	replace leverage_8 = . if lev == .

// Dummy if property is over 80% leveraged
	gen leverage_over8 = 1 if lev > .8
	replace leverage_over8 = 0 if leverage_over8 == .
	replace leverage_over8 = . if lev == .
	
// Dummy if property is leveraged
	gen leverage_0 = 1 if lev > 0 
	replace leverage_0 = 0 if leverage_0 == .
	replace leverage_0 = . if lev == .

// Save full intermediate-final dataset
	save "$pathf\intermediate", replace

